Stock Market Prediction

In [1]:
import math,random
import quandl
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression,SGDRegressor,BayesianRidge,ARDRegression,PassiveAggressiveRegressor,TheilSenRegressor
from sklearn.svm import SVR
from sklearn.ensemble import GradientBoostingRegressor,RandomForestRegressor,StackingRegressor,VotingRegressor
from sklearn.neural_network import MLPRegressor
import plotly.express as px
import warnings
warnings.filterwarnings('ignore')
In [2]:
stock = "MSFT"
daysToForecast = 251*3
In [3]:
def getStockData(stock):
    quandl.ApiConfig.api_key = "qWcicxSctVxrP9PhyneG"
    allData = quandl.get('WIKI/'+stock)
    return allData
In [4]:
def FormatDataForModel(dataArray):
    dataArray = dataArray[['Adj. Open', 'Adj. High', 'Adj. Low', 'Adj. Close', 'Adj. Volume']]
    dataArray['HL_PCT'] = (dataArray['Adj. High'] - dataArray['Adj. Close']) / dataArray['Adj. Close'] * 100.0
    dataArray['PCT_change'] = (dataArray['Adj. Close'] - dataArray['Adj. Open']) / dataArray['Adj. Open'] * 100.0
    dataArray = dataArray[['Adj. Close', 'HL_PCT', 'PCT_change','Adj. Volume']]
    dataArray.fillna(-99999, inplace=True)
    return dataArray
In [5]:
def PreprocessData(mlData,daysToForecast):
    forecast_col = 'Adj. Close'
    forecast_out = int(math.ceil(0.12*daysToForecast))
    mlData['label'] = mlData[forecast_col].shift(-forecast_out)
    #mlData.dropna(inplace=True)
    X = np.array(mlData.drop(['label'],1))
    X = preprocessing.scale(X)
    X_data = X[-daysToForecast:]
    X = X[:-daysToForecast]
    forecastData = mlData[-daysToForecast:]
    trainData= mlData[:-daysToForecast]
    y = np.array(trainData['label'])
    response = [X,y,X_data,forecastData]
    return response
In [6]:
def TrainAndPredict(model,X,y,X_data):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    model.fit(X_train, y_train)
    accuracy = model.score(X_test, y_test)
    prediction = model.predict(X_data)
    return accuracy, prediction
In [7]:
def addPredictionToForecast(prediction,forecastData):
    forecastData = forecastData[['Adj. Close']]
    forecastData = forecastData.rename(columns={'Adj. Close':'EOD'})
    forecastData['prediction'] = prediction[:]
    return forecastData
In [8]:
def GraphPredictions(forecastData,stock):
    fig = px.line(forecastData)
    fig.update_layout(title=stock,
                   xaxis_title='Time',
                   yaxis_title='Price')
    fig.show()
In [9]:
def GraphAllData(allData,forecastData,stock):
    result = pd.concat([allData['Adj. Close'],forecastData['prediction']],axis =1, sort=False)
    fig = px.line(result)
    fig.update_layout(title=stock,
                   xaxis_title='Time',
                   yaxis_title='Price')
    fig.show()
In [10]:
allData = getStockData(stock)
mlData = FormatDataForModel(allData)
X,y,X_data,forecastData = PreprocessData(mlData,daysToForecast)
model = LinearRegression()
accuracy,prediction=TrainAndPredict(model,X,y,X_data)
forecastData = addPredictionToForecast(prediction,forecastData)
In [11]:
print(accuracy)
0.9387963607092171
In [12]:
GraphPredictions(forecastData,stock)
In [13]:
GraphAllData(allData,forecastData,stock)
In [14]:
stock_list = ['AAPL', 'IBM', 'MSFT', 'WMT','AMZN','TSLA','PLUG','GOOGL','FB','CRM']
In [15]:
for stock in stock_list:
    print("Stock: ", stock)
    allData = getStockData(stock)
    mlData = FormatDataForModel(allData)
    X,y,X_data,forecastData = PreprocessData(mlData,daysToForecast)
    model = LinearRegression()
    accuracy,prediction=TrainAndPredict(model,X,y,X_data)
    print("Accuracy: ", accuracy)
    forecastData = addPredictionToForecast(prediction,forecastData)
    GraphPredictions(forecastData,stock)
    GraphAllData(allData,forecastData,stock)
Stock:  AAPL
Accuracy:  0.9685741518775389
Stock:  IBM
Accuracy:  0.9741786905634083
Stock:  MSFT
Accuracy:  0.9378808291934543
Stock:  WMT
Accuracy:  0.9810602981173371
Stock:  AMZN
Accuracy:  0.9395116369182204
Stock:  TSLA
Accuracy:  0.8730166845810572
Stock:  PLUG
Accuracy:  0.5312982502604664
Stock:  GOOGL
Accuracy:  0.8798453577157589
Stock:  FB
Accuracy:  0.8589564857165498
Stock:  CRM
Accuracy:  0.9387320889221141
In [16]:
model_list = [[LinearRegression(), "LinearRegression"],
              [SVR(),"SupportVectorRegression"],
              [MLPRegressor(),"MLPRegressor"],
              [SGDRegressor(),"SGDRegressor"],
              [BayesianRidge(),"BayesianRidge"],
              [ARDRegression(),"ARDRegression"],
              [PassiveAggressiveRegressor(),"PassiveAggressiveRegressor"],
              [TheilSenRegressor(),"TheilSenRegressor"]]
In [17]:
model_results = []
stock_dfs = []
for stock in stock_list:
    print("Stock: ", stock)
    allData = getStockData(stock)
    mlData = FormatDataForModel(allData)
    X,y,X_data,forecastData = PreprocessData(mlData,daysToForecast)
    df_stocks = forecastData[['Adj. Close']]
    df_stocks = df_stocks.rename(columns={'Adj. Close':stock+' Actual'})
    for model,name in model_list:
        accuracy,prediction=TrainAndPredict(model,X,y,X_data)
        print("Model: ",name , "  ","Accuracy:", accuracy)
        model_results.append((name,stock,accuracy))
        df_stocks[name] = prediction[:]
    stock_dfs.append((stock,df_stocks))
Stock:  AAPL
Model:  LinearRegression    Accuracy: 0.963862114349609
Model:  SupportVectorRegression    Accuracy: 0.946380960670708
Model:  MLPRegressor    Accuracy: 0.9716762728267445
Model:  SGDRegressor    Accuracy: 0.9691962984277707
Model:  BayesianRidge    Accuracy: 0.970212187076513
Model:  ARDRegression    Accuracy: 0.9690394375918461
Model:  PassiveAggressiveRegressor    Accuracy: 0.9487754261045318
Model:  TheilSenRegressor    Accuracy: 0.9686884000519873
Stock:  IBM
Model:  LinearRegression    Accuracy: 0.9754289620472804
Model:  SupportVectorRegression    Accuracy: 0.9657263915740694
Model:  MLPRegressor    Accuracy: 0.9780476117004043
Model:  SGDRegressor    Accuracy: 0.975001054806989
Model:  BayesianRidge    Accuracy: 0.975059308924458
Model:  ARDRegression    Accuracy: 0.9764315799631875
Model:  PassiveAggressiveRegressor    Accuracy: 0.9693756133475964
Model:  TheilSenRegressor    Accuracy: 0.9758394646095443
Stock:  MSFT
Model:  LinearRegression    Accuracy: 0.931379150950349
Model:  SupportVectorRegression    Accuracy: 0.9367271394232867
Model:  MLPRegressor    Accuracy: 0.947697189953062
Model:  SGDRegressor    Accuracy: 0.9423862845059565
Model:  BayesianRidge    Accuracy: 0.9405684963001817
Model:  ARDRegression    Accuracy: 0.9423234990099147
Model:  PassiveAggressiveRegressor    Accuracy: 0.9083716574557713
Model:  TheilSenRegressor    Accuracy: 0.933162114459452
Stock:  WMT
Model:  LinearRegression    Accuracy: 0.981867782479499
Model:  SupportVectorRegression    Accuracy: 0.974440941250136
Model:  MLPRegressor    Accuracy: 0.9823029221143644
Model:  SGDRegressor    Accuracy: 0.9805737679008852
Model:  BayesianRidge    Accuracy: 0.9799498244721645
Model:  ARDRegression    Accuracy: 0.9821014160784409
Model:  PassiveAggressiveRegressor    Accuracy: 0.9727772851803524
Model:  TheilSenRegressor    Accuracy: 0.9815054884852309
Stock:  AMZN
Model:  LinearRegression    Accuracy: 0.9354401054702912
Model:  SupportVectorRegression    Accuracy: 0.864880224842041
Model:  MLPRegressor    Accuracy: 0.936887937553471
Model:  SGDRegressor    Accuracy: 0.9392484289853777
Model:  BayesianRidge    Accuracy: 0.9333433603468543
Model:  ARDRegression    Accuracy: 0.9371318542069242
Model:  PassiveAggressiveRegressor    Accuracy: 0.9282429490251647
Model:  TheilSenRegressor    Accuracy: 0.9338625583064578
Stock:  TSLA
Model:  LinearRegression    Accuracy: 0.8619381997641306
Model:  SupportVectorRegression    Accuracy: 0.7741168294360717
Model:  MLPRegressor    Accuracy: 0.6086551186786646
Model:  SGDRegressor    Accuracy: 0.8908505261825365
Model:  BayesianRidge    Accuracy: 0.8549338477802866
Model:  ARDRegression    Accuracy: 0.8464181356429826
Model:  PassiveAggressiveRegressor    Accuracy: 0.8379197183908114
Model:  TheilSenRegressor    Accuracy: 0.8702964340175201
Stock:  PLUG
Model:  LinearRegression    Accuracy: 0.5111989126987436
Model:  SupportVectorRegression    Accuracy: 0.2030827737450337
Model:  MLPRegressor    Accuracy: 0.5758932162857366
Model:  SGDRegressor    Accuracy: 0.5287194744997323
Model:  BayesianRidge    Accuracy: 0.5924646534744642
Model:  ARDRegression    Accuracy: 0.4516404326782659
Model:  PassiveAggressiveRegressor    Accuracy: 0.5097387738598759
Model:  TheilSenRegressor    Accuracy: 0.37217128401808164
Stock:  GOOGL
Model:  LinearRegression    Accuracy: 0.879611890306949
Model:  SupportVectorRegression    Accuracy: 0.6872066853958463
Model:  MLPRegressor    Accuracy: 0.5127718432803652
Model:  SGDRegressor    Accuracy: 0.8851156294426503
Model:  BayesianRidge    Accuracy: 0.8910414540745166
Model:  ARDRegression    Accuracy: 0.862330540425629
Model:  PassiveAggressiveRegressor    Accuracy: 0.876933708792387
Model:  TheilSenRegressor    Accuracy: 0.8735729330290156
Stock:  FB
Model:  LinearRegression    Accuracy: 0.8244796908068193
Model:  SupportVectorRegression    Accuracy: 0.6441937043514565
Model:  MLPRegressor    Accuracy: -1.1673737698697688
Model:  SGDRegressor    Accuracy: 0.8303550609768184
Model:  BayesianRidge    Accuracy: 0.8358625698051811
Model:  ARDRegression    Accuracy: 0.8326900020935671
Model:  PassiveAggressiveRegressor    Accuracy: 0.7498091749073397
Model:  TheilSenRegressor    Accuracy: 0.8302227605761003
Stock:  CRM
Model:  LinearRegression    Accuracy: 0.9372274577745896
Model:  SupportVectorRegression    Accuracy: 0.8965336170111107
Model:  MLPRegressor    Accuracy: 0.9421553705187241
Model:  SGDRegressor    Accuracy: 0.9408784074850545
Model:  BayesianRidge    Accuracy: 0.9373091477790547
Model:  ARDRegression    Accuracy: 0.9294230225199727
Model:  PassiveAggressiveRegressor    Accuracy: 0.9274058559793151
Model:  TheilSenRegressor    Accuracy: 0.9346338314516549
In [18]:
model_names = []
for model,name in model_list:
    model_names.append(name)
df = pd.DataFrame(columns=stock_list,index=model_names)
for i in model_results:
    df.at[i[0],i[1]] = i[2]
df
Out[18]:
AAPL IBM MSFT WMT AMZN TSLA PLUG GOOGL FB CRM
LinearRegression 0.963862 0.975429 0.931379 0.981868 0.93544 0.861938 0.511199 0.879612 0.82448 0.937227
SupportVectorRegression 0.946381 0.965726 0.936727 0.974441 0.86488 0.774117 0.203083 0.687207 0.644194 0.896534
MLPRegressor 0.971676 0.978048 0.947697 0.982303 0.936888 0.608655 0.575893 0.512772 -1.16737 0.942155
SGDRegressor 0.969196 0.975001 0.942386 0.980574 0.939248 0.890851 0.528719 0.885116 0.830355 0.940878
BayesianRidge 0.970212 0.975059 0.940568 0.97995 0.933343 0.854934 0.592465 0.891041 0.835863 0.937309
ARDRegression 0.969039 0.976432 0.942323 0.982101 0.937132 0.846418 0.45164 0.862331 0.83269 0.929423
PassiveAggressiveRegressor 0.948775 0.969376 0.908372 0.972777 0.928243 0.83792 0.509739 0.876934 0.749809 0.927406
TheilSenRegressor 0.968688 0.975839 0.933162 0.981505 0.933863 0.870296 0.372171 0.873573 0.830223 0.934634
In [19]:
highest = []
for i in df.columns:
    highest.append([i, df[i].astype(float).idxmax(), df[i].max()])
df_high = pd.DataFrame(highest, columns=["Stock","Model","Accuracy"])
df_high
Out[19]:
Stock Model Accuracy
0 AAPL MLPRegressor 0.971676
1 IBM MLPRegressor 0.978048
2 MSFT MLPRegressor 0.947697
3 WMT MLPRegressor 0.982303
4 AMZN SGDRegressor 0.939248
5 TSLA SGDRegressor 0.890851
6 PLUG BayesianRidge 0.592465
7 GOOGL BayesianRidge 0.891041
8 FB BayesianRidge 0.835863
9 CRM MLPRegressor 0.942155
In [20]:
average = df.mean(axis=1)
In [21]:
average.sort_values(ascending=False)
Out[21]:
BayesianRidge                 0.891074
SGDRegressor                  0.888232
LinearRegression              0.880243
ARDRegression                 0.872953
TheilSenRegressor             0.867396
PassiveAggressiveRegressor    0.862935
SupportVectorRegression       0.789329
MLPRegressor                  0.628871
dtype: float64
In [22]:
for stock,stock_df in stock_dfs:
    fig = px.line(stock_df)
    fig.update_layout(title=stock,
                   xaxis_title='Time',
                   yaxis_title='Price')
    fig.show()